s4_220331_celltype_definition

nlc

02/08/2022

Experiment & contact info

PIs: Rosalie Sears (), Laura Heiser ()

Sample preparation: Zinab Doha ()

Library prep from single cells: Xi Li & Colin Daniel ()

Analysis: Nick Calistri ()

Sequencing performed by OHSU MPSSR

Analysis design

  • load rliger integrated data (s3_celltypes.rds)

  • separate each lineage with multiple clusters (epithelial, fibroblast, lymphoid, myeloid)

  • perform DE within each lineage

  • Convert cluster to celltype and annotate with relevant pathway/expression

  • Save integrated seurat obejct (s4_so_merge.rds) as well as the lineage subset seurat objects

Object names

epi = epithelial sni = stromal non-immune lym = lymphoid mye = myeloid

Set up

Load libraries

library(Matrix)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## Warning: package 'tibble' was built under R version 4.1.2
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x tidyr::pack()   masks Matrix::pack()
## x tidyr::unpack() masks Matrix::unpack()
library(Seurat)
## Warning: package 'Seurat' was built under R version 4.1.2
## Registered S3 method overwritten by 'spatstat.geom':
##   method     from
##   print.boxx cli
## Attaching SeuratObject
library(clusterProfiler)
## 
## clusterProfiler v4.0.5  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
## 
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141. doi: 10.1016/j.xinn.2021.100141
## 
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:purrr':
## 
##     simplify
## The following object is masked from 'package:stats':
## 
##     filter
library(org.Mm.eg.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.1.2
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:clusterProfiler':
## 
##     rename
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following objects are masked from 'package:Matrix':
## 
##     expand, unname
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:clusterProfiler':
## 
##     slice
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:purrr':
## 
##     reduce
## The following object is masked from 'package:grDevices':
## 
##     windows
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:clusterProfiler':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## 

Set seed

set.seed(4)

load s3_celltypes.rds

so_merge <- readRDS('analysis_files/s3_celltypes.rds')

so_merge@meta.data$barcode <- rownames(so_merge@meta.data)

# Ensure scale.data computed for all genes
if(nrow(so_merge@assays$RNA@scale.data) < nrow(so_merge@assays$RNA@counts)){
  
  print('Scaling all features')
  
  so_merge <- ScaleData(so_merge,
                        features = rownames(so_merge))
  
}else if(nrow(so_merge@assays$RNA@scale.data) == nrow(so_merge@assays$RNA@counts)){
  
  print('ScaleData already computed for all features')
  
}
## [1] "ScaleData already computed for all features"

Create subset seurat objects

so_epi <- subset(so_merge, subset = lineage == 'epithelial')

so_lym <- subset(so_merge, subset = lineage == 'lymphoid')

so_mye <- subset(so_merge, subset = lineage == 'myeloid')

so_fibro <- subset(so_merge, subset = lineage == 'fibroblast')

Prepare DEG function

subset_ego <- function(curr_sub, curr_markers){

  if(nrow(curr_markers) > 1){
    
    curr_top <- curr_markers %>%
      filter(avg_log2FC > 0.5) %>%
      group_by(cluster) %>%
      arrange(desc(avg_log2FC)) %>%
      filter(p_val_adj <= 0.05) %>%
      slice_head(n = 5)
    
    DoHeatmap(curr_sub, features = curr_top$gene)
    
    for(i in unique(curr_markers$cluster)){
      
      
      clust_markers <- curr_markers %>%
        filter(cluster == i)
      
      volcano_plot <- ggplot(clust_markers, aes(x = avg_log2FC, y = -log10(p_val_adj), label = gene))+
        geom_point()+
        ggrepel::geom_text_repel()+
        ggtitle(paste0('Cluster: ', i))
      
      print(volcano_plot)
      
      print(paste0('Evaluating cluster: ', i))
      curr_top <- curr_markers %>%
        filter(cluster == i) %>%
        filter(avg_log2FC >= 0.5) %>%
        filter(p_val_adj <= 0.05)
      
      curr_bottom <- curr_markers %>%
        filter(cluster == i) %>%
        filter(avg_log2FC <= -0.5) %>%
        filter(p_val_adj <= 0.05)
    
      ego_top <- enrichGO(gene = curr_top$gene,
                OrgDb = org.Mm.eg.db,
                keyType = 'SYMBOL',
                ont = "ALL",
                pAdjustMethod = "BH",
                pvalueCutoff = 0.01,
                qvalueCutoff = 0.05)
  
      if(if(!is.null(ego_top)){nrow(ego_top) > 0 & nrow(curr_top) >= 3}){
        ego_top_genelist <- curr_top$avg_log2FC
        names(ego_top_genelist) <- curr_top$gene
        ego_top_genelist <- sort(ego_top_genelist, decreasing = TRUE)
        
        p1 <- dotplot(ego_top, title = paste0('Cluster ', i))
        
        p2 <- cnetplot(ego_top,
                       showCategory = 10,
                       foldChange = ego_top_genelist,
                       colorEdge = TRUE)
        
        p3 <- heatplot(ego_top,
                       showCategory = 10,
                       foldChange = ego_top_genelist)
        
        print(p1)
        print(p2)
        print(p3)
    }else(
      print('No upregulated GOs found')
    )
      
    
    }
      
  }else{
    print('FindAllMarkers did not return DEGs (only one cluster in celltype?)')
  }  
}

Epithelial subset analysis (epi)

UMAP and DE within subset

so_epi <- RunUMAP(so_epi, reduction = 'iNMF', dims = 1:50)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 20:55:32 UMAP embedding parameters a = 0.9922 b = 1.112
## 20:55:32 Read 2824 rows and found 50 numeric columns
## 20:55:32 Using Annoy for neighbor search, n_neighbors = 30
## 20:55:32 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 20:55:32 Writing NN index file to temp file C:\Users\Nick\AppData\Local\Temp\Rtmpw3REP7\file39143f522e53
## 20:55:32 Searching Annoy index using 1 thread, search_k = 3000
## 20:55:33 Annoy recall = 100%
## 20:55:33 Commencing smooth kNN distance calibration using 1 thread
## 20:55:34 Initializing from normalized Laplacian + noise
## 20:55:34 Commencing optimization for 500 epochs, with 121422 positive edges
## 20:55:40 Optimization finished
DimPlot(so_epi, label = TRUE)+
  coord_equal()

epi_markers <- FindAllMarkers(so_epi)
## Calculating cluster 2
## Calculating cluster 8
## Calculating cluster 10
## Calculating cluster 14.3
## Calculating cluster 19
write_csv(epi_markers, file = 'analysis_files/s4_epithelial_markers.csv')

epi_top <- epi_markers %>%
  group_by(cluster) %>%
  filter(p_val_adj <= 0.01) %>%
  arrange(desc(avg_log2FC)) %>%
  slice_head(n = 10)
  
# Heatmap of top markers
DoHeatmap(so_epi, features = epi_top$gene)

# Dendrogram of clusters
so_epi <- BuildClusterTree(so_epi)
PlotClusterTree(so_epi)

# Distribution of cells in each histology
ggplot(so_epi@meta.data, aes(x = cluster_l2, fill = histology))+
  geom_bar()+
  theme_bw()+
  facet_wrap(~histology)

ggplot(so_epi@meta.data, aes(x = cluster_l2, fill = histology))+
  geom_bar()+
  theme_bw()+
  facet_wrap(~cellfrac_type)

Cytokeratin profile of epithelial clusters

keratin_features <- grep(row.names(so_epi), pattern = '^Krt', value = TRUE)

keratin_aves <- AverageExpression(so_epi,
                                  features = keratin_features,
                                  assay = 'RNA')

keratin_aves_filtered <- keratin_aves$RNA[rowSums(keratin_aves$RNA) != 0,]
keratin_aves_top <- keratin_aves$RNA[rowSums(keratin_aves$RNA) >= quantile(rowSums(keratin_aves$RNA), 0.9), ]

pheatmap::pheatmap(keratin_aves_filtered)

pheatmap::pheatmap(keratin_aves_top)

DotPlot(so_epi,
        features = row.names(keratin_aves_top),
        cols = 'RdYlBu')+
  coord_flip()

EGO on DEGs

subset_ego(curr_sub = so_epi,
           curr_markers = epi_markers)
## Warning: ggrepel: 1025 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 2"

## Warning: ggrepel: 905 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 8"

## Warning: ggrepel: 1470 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 10"

## Warning: ggrepel: 400 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 14.3"

## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## Warning: ggrepel: 1141 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 19"

CellMarkerDB markers

# Basal markers:
VlnPlot(so_epi, features = c('Acta2', 'Krt14', 'Krt5', 'Mylk', 'Vim'))

# Luminal Markers
VlnPlot(so_epi, features = c('Krt18', 'Krt19', 'Flot2', 'Cdh1'))

epi Celltype_l2

c2 - epi1_luminal_oxphos c8 - epi2_luminal c10 - epi3_basal c14.3 - epi4_proliferating c19 - epi5_luminal_ros-response

epi_dict <- tibble(cluster = sort(unique(so_epi@meta.data$cluster_l2)),
                   celltype = c('c2-luminal_oxphos',
                                   'c8-luminal_gland_development',
                                   'c10-basal_ecm_modulating',
                                   'c14.3-proliferating',
                                   'c19-luminal_ros_response'))

so_epi@meta.data$celltype_full <- plyr::mapvalues(x = so_epi@meta.data$cluster_l2,
                                          from = epi_dict$cluster,
                                          to = epi_dict$celltype)


DimPlot(so_epi,
        group.by = 'celltype_full',
        label = TRUE)+
  coord_equal()

# pheatmap of celltype
epi_freq_table <- so_epi@meta.data %>%
  dplyr::select(celltype_full, cellfrac_type) %>%
  table()

epi_freq_table <- epi_freq_table[rowSums(epi_freq_table) != 0,]

pheatmap::pheatmap(prop.table(epi_freq_table, margin = 2),
                   display_numbers = epi_freq_table)

Fibroblast subset analysis (fibro)

UMAP and DE within subset

so_fibro <- RunUMAP(so_fibro, reduction = 'iNMF', dims = 1:50,
                  seed.use = 500)
## 20:59:22 UMAP embedding parameters a = 0.9922 b = 1.112
## 20:59:22 Read 1533 rows and found 50 numeric columns
## 20:59:22 Using Annoy for neighbor search, n_neighbors = 30
## 20:59:22 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 20:59:22 Writing NN index file to temp file C:\Users\Nick\AppData\Local\Temp\Rtmpw3REP7\file391437ae1f97
## 20:59:22 Searching Annoy index using 1 thread, search_k = 3000
## 20:59:23 Annoy recall = 100%
## 20:59:23 Commencing smooth kNN distance calibration using 1 thread
## 20:59:24 Initializing from normalized Laplacian + noise
## 20:59:24 Commencing optimization for 500 epochs, with 61640 positive edges
## 20:59:28 Optimization finished
DimPlot(so_fibro, label = TRUE)+
  coord_equal()

fibro_markers <- FindAllMarkers(so_fibro)
## Calculating cluster 7
## Calculating cluster 12
## Calculating cluster 17
write_csv(fibro_markers, file = 'analysis_files/s4_fibro_markers.csv')

fibro_top <- fibro_markers %>%
  group_by(cluster) %>%
  filter(p_val_adj <= 0.01) %>%
  arrange(desc(avg_log2FC)) %>%
  slice_head(n = 10)
  
# Heatmap of top markers
DoHeatmap(so_fibro, features = fibro_top$gene)

# Dendrogram of clusters
so_fibro <- BuildClusterTree(so_fibro)
PlotClusterTree(so_fibro)

# Distribution of cells in each histology
ggplot(so_fibro@meta.data, aes(x = seurat_clusters, fill = histology))+
  geom_bar()+
  theme_bw()+
  facet_wrap(~histology)

ggplot(so_fibro@meta.data, aes(x = seurat_clusters, fill = histology))+
  geom_bar()+
  theme_bw()+
  facet_wrap(~cellfrac_type)

EGO on DEGs

subset_ego(curr_sub = so_fibro,
           curr_markers = fibro_markers)
## Warning: ggrepel: 658 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 7"

## Warning: ggrepel: 1144 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 12"

## Warning: ggrepel: 954 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 17"

c7 - fibro1_ctla2a_high c12 - fibro2_col11_high c17 - fibro3_anti-motility

FeaturePlot(so_fibro,
            features = c('Mme', 'Ctla2a', 'Col11a1', 'Acta2'),
            label = TRUE)

fibro_dict <- tibble(cluster = sort(unique(so_fibro@meta.data$cluster_l2)),
                   celltype = c('c7-ctla2a_high',
                                   'c12-col11_high',
                                   'c17-anti_motility'))

so_fibro@meta.data$celltype_full<- plyr::mapvalues(x = so_fibro@meta.data$cluster_l2,
                                          from = fibro_dict$cluster,
                                          to = fibro_dict$celltype)


DimPlot(so_fibro,
        group.by = 'celltype_full',
        label = TRUE)+
  coord_equal()

# pheatmap of celltype_l2
fibro_freq_table <- so_fibro@meta.data %>%
  dplyr::select(celltype_full, cellfrac_type) %>%
  table()

fibro_freq_table <- fibro_freq_table[rowSums(fibro_freq_table) != 0,]

pheatmap::pheatmap(prop.table(fibro_freq_table, margin = 2),
                   display_numbers = fibro_freq_table)

Lymphoid subset analysis (lym)

UMAP and DE within subset

so_lym <- RunUMAP(so_lym, reduction = 'iNMF', dims = 1:50)
## 21:00:58 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:00:58 Read 3414 rows and found 50 numeric columns
## 21:00:58 Using Annoy for neighbor search, n_neighbors = 30
## 21:00:58 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:00:59 Writing NN index file to temp file C:\Users\Nick\AppData\Local\Temp\Rtmpw3REP7\file3914186808
## 21:00:59 Searching Annoy index using 1 thread, search_k = 3000
## 21:00:59 Annoy recall = 100%
## 21:01:00 Commencing smooth kNN distance calibration using 1 thread
## 21:01:01 Initializing from normalized Laplacian + noise
## 21:01:01 Commencing optimization for 500 epochs, with 163904 positive edges
## 21:01:09 Optimization finished
DimPlot(so_lym, label = TRUE)+
  coord_equal()

lym_markers <- FindAllMarkers(so_lym)
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 9
## Calculating cluster 13
## Calculating cluster 14.1
## Calculating cluster 21
## Calculating cluster 22
write_csv(lym_markers, file = 'analysis_files/s4_lymphoid_markers.csv')

lym_top <- lym_markers %>%
  group_by(cluster) %>%
  filter(p_val_adj <= 0.01) %>%
  arrange(desc(avg_log2FC)) %>%
  slice_head(n = 10)
  
# Heatmap of top markers
DoHeatmap(so_lym, features = lym_top$gene)

# Dendrogram of clusters
so_lym <- BuildClusterTree(so_lym)
PlotClusterTree(so_lym)

# Distribution of cells in each histology
ggplot(so_lym@meta.data, aes(x = cluster_l2, fill = histology))+
  geom_bar()+
  theme_bw()+
  facet_wrap(~histology)

ggplot(so_lym@meta.data, aes(x = cluster_l2, fill = histology))+
  geom_bar()+
  theme_bw()+
  facet_wrap(~cellfrac_type)

FeaturePlots

lym_moi <- c('Cd3e', 'Cd4', 'Trdc', 'Tcrg-C1', 'Cd8a', 'Foxp3', 'Mki67', 'Cd19', 'Cd79b', 'Ms4a1', 'Gzma')

DotPlot(so_lym,
        features = lym_moi,
        cols = 'RdYlBu')+
    coord_flip()

EGO on DEGs

subset_ego(curr_sub = so_lym,
           curr_markers = lym_markers)
## Warning: ggrepel: 235 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 5"

## Warning: ggrepel: 692 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 6"

## Warning: ggrepel: 547 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 9"

## Warning: ggrepel: 630 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 13"

## Warning: ggrepel: 1072 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 14.1"

## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## Warning: ggrepel: 1119 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 21"

## Warning: ggrepel: 2 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## Warning: ggrepel: 853 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 22"

c5 - Cd4 T cell c6 - Gamma Delta T cells c9 - CD8 T cell c13 - Treg c14.1 - Proliferating lymphoid c21 - B cells (Ms4a1{Cd20}) c22 - NK (Gzma)

lym_dict <- tibble(cluster = sort(unique(so_lym@meta.data$cluster_l2)),
                   celltype = c('c5-Cd4_T',
                                'c6-GammaDelta_T',
                                'c9-Cd8_T',
                                'c13-Treg',
                                'c14.1-proliferating',
                                'c21-B',
                                'c22-NK'))

so_lym@meta.data$celltype_full <- plyr::mapvalues(x = so_lym@meta.data$cluster_l2,
                                          from = lym_dict$cluster,
                                          to = lym_dict$celltype)


DimPlot(so_lym,
        group.by = 'celltype_full',
        label = TRUE)+
  coord_equal()

# pheatmap of celltype_full
lym_freq_table <- so_lym@meta.data %>%
  dplyr::select(celltype_full, cellfrac_type) %>%
  table()

lym_freq_table <- lym_freq_table[rowSums(lym_freq_table) != 0,]

pheatmap::pheatmap(prop.table(lym_freq_table, margin = 2),
                   display_numbers = lym_freq_table)

DotPlot(so_lym,
        features = lym_moi,
        cols = 'RdYlBu',
        group.by = 'celltype_full')+
  theme_bw()+
  coord_flip()+
  RotatedAxis()

myeloid subset analysis (imm)

UMAP and DE within subset

so_mye <- RunUMAP(so_mye, reduction = 'iNMF', dims = 1:50)
## 21:05:23 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:05:23 Read 5706 rows and found 50 numeric columns
## 21:05:23 Using Annoy for neighbor search, n_neighbors = 30
## 21:05:23 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:05:24 Writing NN index file to temp file C:\Users\Nick\AppData\Local\Temp\Rtmpw3REP7\file3914464e3059
## 21:05:24 Searching Annoy index using 1 thread, search_k = 3000
## 21:05:25 Annoy recall = 100%
## 21:05:26 Commencing smooth kNN distance calibration using 1 thread
## 21:05:27 Initializing from normalized Laplacian + noise
## 21:05:27 Commencing optimization for 500 epochs, with 269200 positive edges
## 21:05:40 Optimization finished
DimPlot(so_mye, label = TRUE)+
  coord_equal()

mye_markers <- FindAllMarkers(so_mye)
## Calculating cluster 1
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 14.2
## Calculating cluster 15
## Calculating cluster 16
## Calculating cluster 18
## Calculating cluster 20
## Calculating cluster 24
write_csv(mye_markers, file = 'analysis_files/s4_myeloid_markers.csv')

mye_top <- mye_markers %>%
  group_by(cluster) %>%
  filter(p_val_adj <= 0.01) %>%
  arrange(desc(avg_log2FC)) %>%
  slice_head(n = 10)
  
# Heatmap of top markers
DoHeatmap(so_mye, features = mye_top$gene)

# Dendrogram of clusters
so_mye <- BuildClusterTree(so_mye)
PlotClusterTree(so_mye)

# Distribution of cells in each histology
ggplot(so_mye@meta.data, aes(x = cluster_l2, fill = histology))+
  geom_bar()+
  theme_bw()+
  facet_wrap(~histology)

ggplot(so_mye@meta.data, aes(x = cluster_l2, fill = histology))+
  geom_bar()+
  theme_bw()+
  facet_wrap(~cellfrac_type)

EGO on DEGs

subset_ego(curr_sub = so_mye,
           curr_markers = mye_markers)
## Warning: ggrepel: 1738 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 1"

## Warning: ggrepel: 1919 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 3"

## Warning: ggrepel: 1757 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 4"

## Warning: ggrepel: 1554 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 14.2"

## Warning: ggrepel: 1170 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 15"

## Warning: ggrepel: 1000 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 16"

## Warning: ggrepel: 321 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 18"

## Warning: ggrepel: 1367 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 20"

## Warning: ggrepel: 1375 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## [1] "Evaluating cluster: 24"

c1 - Macrophage c3 - Neutrophil Lrg high (S100a8/9 high, low nFeature) c4 - Neutrophil Ccl{3,4} high (S100a8/9 high, low nFeature) c14.2 - Proliferating c15 - cDC2 (Cd209a) c16 - Monocyte (Plac8, Ly6c2) c18 - IFN response c20 - cDC1 (Clec9a, Cadm1) c24 - DC

mye_moi <- c('Cd14', 'Cd68', 'Cd74', 'Itgam', 'Itgax', 'Mki67', 'S100a8', 'S100a9', 'Ccl3', 'Ccl4', 'Cd209a', 'Plac8', 'Ly6c2', 'Isg15', 'Clec9a', 'Cadm1')

DotPlot(so_mye,
        features = mye_moi,
        cols = 'RdYlBu')+
  theme_bw()+
  coord_flip()

mye_dict <- tibble(cluster = sort(unique(so_mye@meta.data$cluster_l2)),
                   celltype = c('c1-Macrophage',
                                   'c3-Neutrophil_Lrg_high',
                                   'c4-Neutrophil_CCL3_high',
                                   'c14.2-proliferating',
                                   'c15-cDC2',
                                   'c16-Monocyte',
                                   'c18-IFN_response',
                                   'c20-cDC1',
                                   'c24-DC'))

so_mye@meta.data$celltype_full <- plyr::mapvalues(x = so_mye@meta.data$cluster_l2,
                                          from = mye_dict$cluster,
                                          to = mye_dict$celltype)


DimPlot(so_mye,
        group.by = 'celltype_full',
        label = TRUE)+
  coord_equal()

# pheatmap of celltype_l2
mye_freq_table <- so_mye@meta.data %>%
  dplyr::select(celltype_full, cellfrac_type) %>%
  table()

mye_freq_table <- mye_freq_table[rowSums(mye_freq_table) != 0,]

pheatmap::pheatmap(prop.table(mye_freq_table, margin = 2),
                   display_numbers = mye_freq_table)

DotPlot(so_mye,
        features = mye_moi,
        cols = 'RdYlBu',
        group.by = 'celltype_full')+
  theme_bw()+
  coord_flip()+
  RotatedAxis()

ggplot(so_mye@meta.data, aes(x = celltype_full, fill = tumor))+
  geom_bar()+
  theme_bw()+
  RotatedAxis()

save output

Add celltype_l2 to so_merge

celltype_full_dict <- rbind(epi_dict,
                            fibro_dict,
                            lym_dict,
                            mye_dict,
                            c(11, 'c11-endothelial'),
                            c(23, 'c23-perivascular'))

# Add Seurat metadata 
so_merge@meta.data$celltype_full <- plyr::mapvalues(x = so_merge@meta.data$cluster_l2,
                                              from = celltype_full_dict$cluster,
                                              to = celltype_full_dict$celltype)


# correct rownames as barcodes
rownames(so_merge@meta.data) <- so_merge@meta.data$barcode

DimPlot(so_merge,
        group.by = 'celltype_full',
        label = TRUE,
        label.size = 5)+
  theme(legend.position = 'none')+
  coord_equal()

Save seurat objects as .rds files

saveRDS(so_merge, file = 'analysis_files/s4_so_merge.rds')

sessionInfo()

sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252 
## [2] LC_CTYPE=English_United States.1252   
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] org.Mm.eg.db_3.13.0   AnnotationDbi_1.56.2  IRanges_2.28.0       
##  [4] S4Vectors_0.32.3      Biobase_2.54.0        BiocGenerics_0.40.0  
##  [7] clusterProfiler_4.0.5 SeuratObject_4.0.4    Seurat_4.1.0         
## [10] forcats_0.5.1         stringr_1.4.0         dplyr_1.0.8          
## [13] purrr_0.3.4           readr_2.1.2           tidyr_1.2.0          
## [16] tibble_3.1.6          ggplot2_3.3.5         tidyverse_1.3.1      
## [19] Matrix_1.3-4         
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.2             reticulate_1.24        tidyselect_1.1.2      
##   [4] RSQLite_2.2.9          htmlwidgets_1.5.4      grid_4.1.1            
##   [7] BiocParallel_1.28.3    Rtsne_0.15             scatterpie_0.1.7      
##  [10] munsell_0.5.0          codetools_0.2-18       ica_1.0-2             
##  [13] future_1.24.0          miniUI_0.1.1.1         withr_2.5.0           
##  [16] spatstat.random_2.1-0  colorspace_2.0-3       GOSemSim_2.18.1       
##  [19] highr_0.9              knitr_1.38             rstudioapi_0.13       
##  [22] ROCR_1.0-11            tensor_1.5             DOSE_3.18.3           
##  [25] listenv_0.8.0          labeling_0.4.2         GenomeInfoDbData_1.2.7
##  [28] polyclip_1.10-0        pheatmap_1.0.12        farver_2.1.0          
##  [31] bit64_4.0.5            downloader_0.4         treeio_1.16.2         
##  [34] parallelly_1.30.0      vctrs_0.3.8            generics_0.1.2        
##  [37] xfun_0.30              R6_2.5.1               GenomeInfoDb_1.30.1   
##  [40] graphlayouts_0.7.1     rmdformats_1.0.3       gridGraphics_0.5-1    
##  [43] bitops_1.0-7           spatstat.utils_2.3-0   cachem_1.0.6          
##  [46] fgsea_1.18.0           assertthat_0.2.1       vroom_1.5.7           
##  [49] promises_1.2.0.1       scales_1.1.1           ggraph_2.0.5          
##  [52] enrichplot_1.12.3      gtable_0.3.0           globals_0.14.0        
##  [55] goftest_1.2-3          tidygraph_1.2.0        rlang_1.0.1           
##  [58] splines_4.1.1          lazyeval_0.2.2         spatstat.geom_2.3-2   
##  [61] broom_0.7.12           yaml_2.3.5             reshape2_1.4.4        
##  [64] abind_1.4-5            modelr_0.1.8           backports_1.3.0       
##  [67] httpuv_1.6.5           qvalue_2.24.0          tools_4.1.1           
##  [70] bookdown_0.24          ggplotify_0.1.0        ellipsis_0.3.2        
##  [73] spatstat.core_2.4-0    jquerylib_0.1.4        RColorBrewer_1.1-2    
##  [76] ggridges_0.5.3         Rcpp_1.0.7             plyr_1.8.6            
##  [79] zlibbioc_1.40.0        RCurl_1.98-1.5         rpart_4.1-15          
##  [82] deldir_1.0-6           viridis_0.6.2          pbapply_1.5-0         
##  [85] cowplot_1.1.1          zoo_1.8-9              haven_2.4.3           
##  [88] ggrepel_0.9.1          cluster_2.1.2          fs_1.5.2              
##  [91] magrittr_2.0.1         RSpectra_0.16-0        data.table_1.14.2     
##  [94] scattermore_0.8        DO.db_2.9              lmtest_0.9-39         
##  [97] reprex_2.0.1           RANN_2.6.1             ggnewscale_0.4.6      
## [100] fitdistrplus_1.1-6     matrixStats_0.61.0     hms_1.1.1             
## [103] patchwork_1.1.1        mime_0.12              evaluate_0.15         
## [106] xtable_1.8-4           readxl_1.3.1           gridExtra_2.3         
## [109] compiler_4.1.1         shadowtext_0.1.1       KernSmooth_2.23-20    
## [112] crayon_1.5.0           htmltools_0.5.2        ggfun_0.0.5           
## [115] mgcv_1.8-36            later_1.3.0            tzdb_0.2.0            
## [118] aplot_0.1.2            lubridate_1.8.0        DBI_1.1.2             
## [121] tweenr_1.0.2           dbplyr_2.1.1           MASS_7.3-54           
## [124] cli_3.1.1              parallel_4.1.1         igraph_1.2.7          
## [127] pkgconfig_2.0.3        plotly_4.10.0          spatstat.sparse_2.1-0 
## [130] xml2_1.3.3             ggtree_3.0.4           bslib_0.3.1           
## [133] XVector_0.34.0         rvest_1.0.2            yulab.utils_0.0.4     
## [136] digest_0.6.27          sctransform_0.3.3      RcppAnnoy_0.0.19      
## [139] spatstat.data_2.1-2    Biostrings_2.62.0      rmarkdown_2.13        
## [142] cellranger_1.1.0       leiden_0.3.9           fastmatch_1.1-3       
## [145] tidytree_0.3.8         uwot_0.1.11            shiny_1.7.1           
## [148] lifecycle_1.0.1        nlme_3.1-152           jsonlite_1.7.2        
## [151] limma_3.48.3           viridisLite_0.4.0      fansi_1.0.2           
## [154] pillar_1.7.0           lattice_0.20-44        KEGGREST_1.34.0       
## [157] fastmap_1.1.0          httr_1.4.2             survival_3.2-11       
## [160] GO.db_3.13.0           glue_1.6.1             png_0.1-7             
## [163] bit_4.0.4              ggforce_0.3.3          stringi_1.7.6         
## [166] sass_0.4.0             blob_1.2.2             memoise_2.0.1         
## [169] ape_5.5                irlba_2.3.5            future.apply_1.8.1